<?php


namespace Gaolei\Pscws\tools;


class XdbTool
{

    protected static $instance;

    public const CODE_FAIL = 'fail';
    public const CODE_OKAY = 'okay';

    protected $xdb;
    protected $isUtf8;

    /** @var string 数据分隔符 */
    public $separator = "\t";

    protected function __construct()
    {
        $this->xdb = new XTreeBD();
    }

    public function __destruct()
    {
        $this->xdb->Close();
    }

    public static function init(bool $isUtf8 = true): self
    {
        if (!self::$instance) {
            set_time_limit(0);
            ini_set('memory_limit', '1024M');
            $instance = new self();
            $instance->isUtf8 = $isUtf8;
            self::$instance = $instance;
        }
        return self::$instance;
    }

    public function dump(string $xdbFile, string $outputFile): array
    {
        $resp = ['code' => self::CODE_FAIL, 'msg' => ''];
        if (!$this->xdb->Open($xdbFile)) {
            $resp['msg'] = "ERROR: input file {$xdbFile} maybe not a valid XDB file.";
            return $resp;
        }
        if (!($fd = @fopen($outputFile, 'w'))) {
            $resp['msg'] = "ERROR: can not open the output file: {$outputFile}";
            return $resp;
        }
        $line = "# WORD{$this->separator}TF{$this->separator}IDF{$this->separator}ATTR\n";
        fwrite($fd, $line);
        $this->xdb->Reset();
        while ($tmp = $this->xdb->Next()) {
            if (strlen($tmp['value']) !== 12) continue;
            $word = $tmp['key'];
            $data = unpack("ftf/fidf/Cflag/a3attr", $tmp['value']);
            if (!($data['flag'] & 0x01)) continue;
            $line = sprintf("%s{$this->separator}%.2f{$this->separator}%.2f{$this->separator}%.2s\n", $word, $data['tf'], $data['idf'], $data['attr']);
            fwrite($fd, $line);
        }
        fclose($fd);
        $resp['code'] = self::CODE_OKAY;
        $resp['msg'] = "SUCCESS: Exported to {$outputFile} okay";
        return $resp;
    }

    public function make(string $txtFile, string $xdbFile): array
    {
        $resp = ['code' => self::CODE_FAIL, 'msg' => ''];
        if (!extension_loaded('mbstring')) {
            $resp['msg'] = "Usage: mbstring exteions is required.";
            return $resp;
        }
        if (file_exists($xdbFile)) {
            unlink($xdbFile);
        }
        mb_internal_encoding($this->isUtf8 ? 'UTF-8' : 'gbk');

        $parseWords = $this->getTxtWords($txtFile);
        if ($parseWords['code'] !== self::CODE_OKAY) {
            $resp['msg'] = $parseWords['msg'];
            return $resp;
        }
        [$total, $rec] = array_values($parseWords['data']);
        if (!$this->xdb->Open($xdbFile, 'w')) {
            $resp['msg'] = "ERROR: can not open the XDB to write: $xdbFile";
            return $resp;
        }
        $cnt = 0;
        for ($k = 0; $k < 0x40; $k++) {
            if (!isset($rec[$k])) continue;
            foreach ($rec[$k] as $w => $v) {
                $flag = isset($v['tf']) ? 0x01 : 0;
                if (@$v['part']) {
                    $flag |= 0x02;
                }
                $data = pack('ffCa3', @$v['tf'], @$v['idf'], $flag, @$v['attr']);
                $this->xdb->Put($w, $data);
                $cnt++;
            }
        }
        flush();
        $this->xdb->Optimize();
        $resp['code'] = self::CODE_OKAY;
        $resp['msg'] = sprintf("%d Records saved.\n", $cnt);
        return $resp;
    }

    private function getTxtWords(string $txtFile): array
    {
        $resp = ['code' => self::CODE_FAIL, 'msg' => ''];
        if (!($fd = @fopen($txtFile, 'r'))) {
            $resp['msg'] = "ERROR: can not open the input file: {$txtFile}";
            return $resp;
        }
        $total = 0;
        $rec = array();
        while ($line = fgets($fd, 512)) {
            if (substr($line, 0, 1) == '#') continue;
            try {
                list($word, $tf, $idf, $attr) = explode($this->separator, $line, 4);
            }catch (\Throwable $throwable){
                continue;
            }
            $k = (@ord($word[0]) + @ord($word[1])) & 0x3f;
            $attr = trim($attr);
            if (!isset($rec[$k])) $rec[$k] = array();
            if (!isset($rec[$k][$word])) {
                $total++;
                $rec[$k][$word] = array();
            }
            $rec[$k][$word]['tf'] = $tf;
            $rec[$k][$word]['idf'] = $idf;
            $rec[$k][$word]['attr'] = $attr;
            $len = mb_strlen($word);
            while ($len > 2) {
                $len--;
                $temp = mb_substr($word, 0, $len);
                if (!isset($rec[$k][$temp])) {
                    $total++;
                    $rec[$k][$temp] = array();
                }
                $rec[$k][$temp]['part'] = 1;
            }
        }
        fclose($fd);
        $resp['code'] = self::CODE_OKAY;
        $resp['msg'] = "OK, we have {$total} words";
        $resp['data'] = compact('total', 'rec');
        return $resp;
    }


}